# This script prepares the final dataset for analysis:
#   1/ put together panel of municipal finance data 
#   2/ merge in population, fire exposure, income tax, and HMDA data
#   3/ construct stacked dataset
# Output: final dataset (final.rds), municipality-year panel (muni_panel.rds)

rm(list = ls())

library(tidyverse)
library(readxl)
library(RSQLite)
library(dbplyr)

############## Specify path to data folder ##############
setwd("[FILEPATH HERE]")


############## Extract selected variables from 1991-2002 ##############
main <- data.frame()

for (year in 1991:2002) {
  
  filename = paste0("CityData_1991_2002/CityData", 
                    as.character(year-1), "-", 
                    as.character(year) %>% substr(3, 4), 
                    ".sqlite")
  
  # connect to the SQLite file
  con <- dbConnect(drv = RSQLite::SQLite(), 
                   dbname = filename)
  
  dbListTables(con)
  
  # ENTITIES: Entity Maintenance
  entities <- tbl(con, "ENTITIES") %>% as.data.frame()
  
  # CI_CONS_ASS_LIAB: Consolidated Statement of Assets, Liabilities, and Fund Balances/Working Capital
  cons_ass_liab <- tbl(con, "CI_CONS_ASS_LIAB") %>% as.data.frame()
  
  cons_ass_liab <- cons_ass_liab %>% 
    select(ENTITY_ID, TOT_ASSET = TOT_TOT_ASSET, TOT_LIAB = TOT_LIAB_TOT, 
           TOT_NET_CURR_ASSET, TOT_NET_CURR_LIAB, ACC_LTD_TOT_ASSET, ACC_LTD_LIAB_TOT,
           TOT_WORK_CAP)
  
  # CI_CONS_REV_EXP_FUND: Consolidated Statement of Revenues, Expenditures, and Changes in Fund Balance/Working Capital
  cons_rev_exp_fund <- tbl(con, "CI_CONS_REV_EXP_FUND") %>% as.data.frame()
  
  cons_rev_exp_fund <- cons_rev_exp_fund %>% 
    select(ENTITY_ID, TOT_EXP_TOT, FUNC_TOT, EXP_REV_TOT, 
           GENREV_TOT, GENREV_EXC_OF_GEN_REV, 
           TOT_EXP_GEN_GOV, FUNC_GEN_GOV, EXP_REV_GEN_GOV,
           TOT_EXP_PUB_SAFETY, FUNC_PUB_SAFETY, EXP_REV_PUB_SAFETY,
           TOT_EXP_TRANSP, FUNC_TRANSP, EXP_REV_TRANSP,
           TOT_EXP_COMM_DEVELOP, FUNC_COMM_DEVELOP, EXP_REV_COMM_DEVELOP, 
           TOT_EXP_HEALTH, FUNC_HEALTH, EXP_REV_HEALTH, 
           TOT_EXP_CULT_LEISURE, FUNC_CULT_LEISURE, EXP_REV_CULT_LEISURE,
           TOT_EXP_PUB_UTILITIES, FUNC_PUB_UTILITIES, EXP_REV_PUB_UTILITIES)
  
  
  # CI_REV_TAXES: Taxes
  rev_taxes <- tbl(con, "CI_REV_TAXES") %>% as.data.frame()
  
  rev_taxes <- rev_taxes %>% rowwise() %>%
    mutate(FUNC_TRANSP_TAX = sum(FUNC_TRANSP_TAX_NONTRANSIT, 
                                 FUNC_TRANSP_TAX_TRANSIT, na.rm = T),
           CONST_DEV_TAX = sum(FUNC_CONST_DEV_TAX, 
                               GENREV_CONST_DEV_TAX %>% as.numeric(), na.rm = T),
           GENREV_PROPTAX = sum(GENREV_SEC_UNSEC_PROPTAX,
                                GENREV_SUPP_SEC_UNSEC_PROPTAX %>% as.numeric(),
                                GENREV_PROPTAX_PRIOR %>% as.numeric(), na.rm = T),
           TRANSIENT_LODGING_TAX = sum(GENREV_TRANSIENT_LOGING_TAX, 
                                       FUNC_TRANSIENT_LODGING_TAX, na.rm = T))
  
  rev_taxes <- rev_taxes %>%
    select(ENTITY_ID, GENREV_PROPTAX, GENREV_SALE_USE_TAX, FUNC_TRANSP_TAX, 
           TRANSIENT_LODGING_TAX, GENREV_FRANCHISES, GENREV_BUSINESS_LICENSE_TAX,
           GENREV_REAL_PROP_TRANSFER_TAX, GENREV_UTILITY_USER_TAX, CONST_DEV_TAX, 
           FUNC_TOT_TAXES = FUNC_TOT, GENREV_TOT_TAXES = GENREV_TOT, 
           TOT_REV_TAXES = TOT_REV_TOT)
  
  # CI_REV_CHARGES_CURR_SERV: Current Service Charges
  rev_charges_serv <- tbl(con, "CI_REV_CHARGES_CURR_SERV") %>% as.data.frame()
  
  rev_charges_serv <- rev_charges_serv %>%
    select(ENTITY_ID, FUNC_SP_FIRE_DEPT_SERV, FUNC_CHARGES = FUNC_TOT)
  
  
  # CI_EXP_GEN_GOV: General Government and Public Safety (total, fire and disaster preparedness)
  exp_gen_gov <- tbl(con, "CI_EXP_GEN_GOV") %>% as.data.frame()
  
  exp_gen_gov <- exp_gen_gov %>%
    select(ENTITY_ID, OPEXP_FIRE_PUB, CAP_FIRE_PUB, DEBT_FIRE_PUB, EXP_FIRE_PUB, 
           FUNC_FIRE_PUB, EXPREV_FIRE_PUB, OPEXP_DISASTER_PUB, CAP_DISASTER_PUB, 
           DEBT_DISASTER_PUB, EXP_DISASTER_PUB, FUNC_DISASTER_PUB, EXPREV_DISASTER_PUB)
  
  
  # CI_EXP_PUB_UTILITIES: 
  exp_pub_util <- tbl(con, "CI_EXP_PUB_UTILITIES") %>% as.data.frame()
  
  exp_pub_util <- exp_pub_util %>%
    select(ENTITY_ID, OPEXP_WATER, CAP_WATER, DEBT_WATER, EXP_WATER, FUNC_WATER, 
           EXPREV_WATER, OPEXP_GAS, CAP_GAS, DEBT_GAS, EXP_GAS, FUNC_GAS, 
           EXPREV_GAS, OPEXP_ELECTRIC, CAP_ELECTRIC, DEBT_ELECTRIC, EXP_ELECTRIC, 
           FUNC_ELECTRIC, EXPREV_ELECTRIC)
  
  # CI_REV_FEDERAL_AGENCIES: Intergovernmental - Federal, County, and Other Taxes In-Lieu
  rev_fed_agencies <- tbl(con, "CI_REV_FEDERAL_AGENCIES") %>% as.data.frame()
  
  rev_fed_agencies <- rev_fed_agencies %>% 
    select(ENTITY_ID, FUNC_CO = FUNC_TOT_CO, GENREV_CO = GENREV_TOT_CO, 
           TOT_REV_CO = TOT_REV_TOT_CO, FUNC_COMM_DEV_BLOCK_GRANT, FUNC_FED = FUNC_TOT, 
           GENREV_FED = GENREV_TOT, TOT_REV_FED = TOT_REV_TOT)
  
  # CI_REV_STATE_AGENCIES: Intergovernmental - State
  rev_st_agencies <- tbl(con, "CI_REV_STATE_AGENCIES") %>% as.data.frame()
  
  rev_st_agencies <- rev_st_agencies %>%
    select(ENTITY_ID, FUNC_ST = FUNC_TOT, GENREV_ST = GENREV_TOT, 
           TOT_REV_ST = TOT_REV_TOT)
  
  # putting tables for the same year together
  temp <- entities %>% 
    left_join(cons_ass_liab, by = "ENTITY_ID") %>%
    left_join(cons_rev_exp_fund, by = "ENTITY_ID") %>%
    left_join(rev_taxes, by = "ENTITY_ID") %>%
    left_join(rev_charges_serv, by = "ENTITY_ID") %>%
    left_join(exp_gen_gov, by = "ENTITY_ID") %>%
    left_join(exp_pub_util, by = "ENTITY_ID") %>%
    left_join(rev_fed_agencies, by = "ENTITY_ID") %>%
    left_join(rev_st_agencies, by = "ENTITY_ID")
  
  temp <- temp %>% mutate(FISCAL_YEAR = year)
  
  if (year == 2002) {
    temp <- temp %>% select(-ENTITY_NUMBER, -COUNTY_NUMBER)
  }
  main <- rbind(main, temp)
}

# store dataframe for 1991-2002
main_91_02 <- main

rm(main, con, entities, cons_ass_liab, cons_rev_exp_fund, rev_charges_serv, rev_taxes, 
   exp_gen_gov, exp_transp, exp_health, exp_pub_util, rev_fed_agencies, rev_st_agencies,
   temp)


############## Extract selected variables from 1991-2002 ##############

# ENTITIES
entities <- read_excel("CI_EachDataSet_20180613.xlsx", sheet = "ENTITIES")

entities <- entities %>% 
  select(-`Entity Type`, -`Paper Report Due Date`, -`Electronic Report Due Date`, 
         -City, -State)

names(entities) <- c("ENTITY_NAME", "ENTITY_ID", "COUNTY_NAME",
                     "ZIP", "POPULATION", "CLASS_DATA", "FY_END")

# CI_CONS_ASS_LIAB: Consolidated Statement of Assets, Liabilities, and Fund Balances/Working Capital

cons_ass_liab <- read_excel("CI_EachDataSet_20180613.xlsx", sheet = "CI_CONS_ASS_LIAB")

cons_ass_liab <- cons_ass_liab %>%
  select(ENTITY_ID = `Entity ID`, 
         FISCAL_YEAR = `Fiscal Year`,
         TOT_ASSET = `Total Assets_Total`, 
         TOT_LIAB = `Total Liabilities_Total`,
         TOT_NET_CURR_ASSET = `Net Current Assets_Total`, 
         TOT_NET_CURR_LIAB = `Net Current Liabilities_Total`,
         ACC_LTD_TOT_ASSET = `Total Assets_General Long-Term Debt`, 
         ACC_LTD_LIAB_TOT = `Total Liabilities_General Long-Term Debt`,
         TOT_WORK_CAP = `Working Capital_Total`)


# CI_CONS_REV_EXP_FUND: Consolidated Statement of Revenues, Expenditures, and Changes in Fund Balance/Working Capital

cons_rev_exp_fund <- read_excel("CI_EachDataSet_20180613.xlsx", sheet = "CI_CONS_REV_EXP_FUND")

cons_rev_exp_fund <- cons_rev_exp_fund %>%
  select(ENTITY_ID = `Entity ID`,
         FISCAL_YEAR = `Fiscal Year`,
         TOT_EXP_TOT = `Total_Total Expenditures`, 
         FUNC_TOT = `Total_Functional Revenues`, 
         EXP_REV_TOT = `Total_Net Expenditures/(Excess Revenues)`,
         GENREV_TOT = `Total_General Revenues`, 
         GENREV_EXC_OF_GEN_REV = `Excess/(Deficiency) of General Revenues Over Net Expenditures`,
         TOT_EXP_GEN_GOV = `General Government_Total Expenditures`,
         FUNC_GEN_GOV = `General Government_Functional Revenues`,
         EXP_REV_GEN_GOV = `General Government_Net Expenditures/(Excess Revenues)`,
         TOT_EXP_PUB_SAFETY = `Public Safety_Total Expenditures`, 
         FUNC_PUB_SAFETY = `Public Safety_Functional Revenues`, 
         EXP_REV_PUB_SAFETY = `Public Safety_Net Expenditures/(Excess Revenues)`,
         TOT_EXP_TRANSP = `Transportation_Total Expenditures`, 
         FUNC_TRANSP = `Transportation_Functional Revenues`, 
         EXP_REV_TRANSP = `Transportation_Net Expenditures/(Excess Revenues)`,
         TOT_EXP_COMM_DEVELOP = `Community Development_Total Expenditures`, 
         FUNC_COMM_DEVELOP = `Community Development_Functional Revenues`, 
         EXP_REV_COMM_DEVELOP = `Community Development_Net Expenditures/(Excess Revenues)`, 
         TOT_EXP_HEALTH = `Health_Total Expenditures`, 
         FUNC_HEALTH = `Health_Functional Revenues`, 
         EXP_REV_HEALTH = `Health_Net Expenditures/(Excess Revenues)`, 
         TOT_EXP_CULT_LEISURE = `Culture and Leisure_Total Expenditures`, 
         FUNC_CULT_LEISURE = `Culture and Leisure_Functional Revenues`, 
         EXP_REV_CULT_LEISURE = `Culture and Leisure_Net Expenditures/(Excess Revenues)`,
         TOT_EXP_PUB_UTILITIES = `Public Utilities_Total Expenditures`, 
         FUNC_PUB_UTILITIES = `Public Utilities_Functional Revenues`, 
         EXP_REV_PUB_UTILITIES = `Public Utilities_Net Expenditures/(Excess Revenues)`)

# CI_REV_TAXES: Taxes
rev_taxes <- read_excel("CI_EachDataSet_20180613.xlsx", sheet = "CI_REV_TAXES")

rev_taxes <- rev_taxes %>% rowwise() %>%
  mutate(FUNC_TRANSP_TAX = 
           sum(`Transportation Tax - Transit_Functional Revenues`,
               `Transportation Tax - Non Transit_Functional Revenues`,
               na.rm = T),
         CONST_DEV_TAX = 
           sum(`Construction Development Taxes_General Revenues` %>% as.numeric(),
               `Construction Development Taxes_Functional Revenues` %>% as.numeric(),
               na.rm = T),
         GENREV_PROPTAX = 
           sum(`Supplemental Roll Secured and Unsecured Property Tax_General Revenues` %>% 
                 as.numeric(),
               `Secured and Unsecured Property Tax_General Revenues` %>% as.numeric(),
               `Property Taxes - Prior_General Revenues` %>% as.numeric(),
               na.rm = T),
         TRANSIENT_LODGING_TAX = 
           sum(`Transient Lodging Taxes_General Revenues` %>% as.numeric(),
               `Transient Lodging Taxes_Functional Revenues`, na.rm = T))

rev_taxes <- rev_taxes %>%
  select(ENTITY_ID = `Entity ID`, 
         FISCAL_YEAR = `Fiscal Year`,
         GENREV_PROPTAX,
         GENREV_SALE_USE_TAX = `Sales and Use Taxes_General Revenues`, 
         FUNC_TRANSP_TAX, 
         TRANSIENT_LODGING_TAX = `Transient Lodging Taxes_General Revenues`,
         GENREV_FRANCHISES = `Franchises_General Revenues`, 
         GENREV_BUSINESS_LICENSE_TAX = `Business License Taxes_General Revenues`,
         GENREV_REAL_PROP_TRANSFER_TAX = `Real Property Transfer Taxes_General Revenues`, 
         GENREV_UTILITY_USER_TAX = `Utility Users Taxes_General Revenues`,
         CONST_DEV_TAX,
         FUNC_TOT_TAXES = `Total_Functional Revenues`, 
         GENREV_TOT_TAXES = `Total_General Revenues`, 
         TOT_REV_TAXES = `Total_Total Revenues`)


# CI_EXP_GEN_GOV: General Government and Public Safety (for fire and disaster preparedness expenditures)
exp_gen_gov <- read_excel("CI_EachDataSet_20180613.xlsx", sheet = "CI_EXP_GEN_GOV")

exp_gen_gov <- exp_gen_gov %>%
  select(ENTITY_ID = `Entity ID`, 
         FISCAL_YEAR = `Fiscal Year`,
         OPEXP_FIRE_PUB = `Fire_Operating Expenditures`, 
         CAP_FIRE_PUB = `Fire_Capital Outlay`, 
         DEBT_FIRE_PUB = `Fire_Debt Service`, 
         EXP_FIRE_PUB = `Fire_Total Expenditures`, 
         FUNC_FIRE_PUB = `Fire_Functional Revenues`, 
         EXPREV_FIRE_PUB = `Fire_Net Expenditures/(Excess Revenues)`,
         OPEXP_DISASTER_PUB = `Disaster Preparedness_Operating Expenditures`, 
         CAP_DISASTER_PUB = `Disaster Preparedness_Capital Outlay`, 
         DEBT_DISASTER_PUB = `Disaster Preparedness_Debt Service`,
         EXP_DISASTER_PUB = `Disaster Preparedness_Total Expenditures`, 
         FUNC_DISASTER_PUB = `Disaster Preparedness_Functional Revenues`, 
         EXPREV_DISASTER_PUB = `Disaster Preparedness_Net Expenditures/(Excess Revenues)`)


# CI_EXP_PUB_UTILITIES: 

exp_pub_util <- read_excel("CI_EachDataSet_20180613.xlsx", sheet = "CI_EXP_PUB_UTILITIES")

exp_pub_util <- exp_pub_util %>% 
  select(ENTITY_ID = `Entity ID`,
         FISCAL_YEAR = `Fiscal Year`,
         OPEXP_WATER = `Water_Operating Expenditures`,
         CAP_WATER = `Water_Capital Outlay`,
         DEBT_WATER = `Water_Debt Service`,
         EXP_WATER = `Water_Total Expenditures`,
         FUNC_WATER = `Water_Functional Revenues`,
         EXPREV_WATER = `Water_Net Expenditures/(Excess Revenues)`,
         OPEXP_GAS = `Gas_Operating Expenditures`,
         CAP_GAS = `Gas_Capital Outlay`,
         DEBT_GAS = `Gas_Debt Service`,
         EXP_GAS = `Gas_Total Expenditures`,
         FUNC_GAS = `Gas_Functional Revenues`,
         EXPREV_GAS = `Gas_Net Expenditures/(Excess Revenues)`,
         OPEXP_ELECTRIC = `Electric_Operating Expenditures`,
         CAP_ELECTRIC = `Electric_Capital Outlay`,
         DEBT_ELECTRIC = `Electric_Debt Service`,
         EXP_ELECTRIC = `Electric_Total Expenditures`,
         FUNC_ELECTRIC = `Electric_Functional Revenues`,
         EXPREV_ELECTRIC = `Electric_Net Expenditures/(Excess Revenues)`)


# CI_REV_FEDERAL_AGENCIES: Intergovernmental - Federal, County, and Other Taxes In-Lieu

rev_fed_agencies <- read_excel("CI_EachDataSet_20180613.xlsx", sheet = "CI_REV_FEDERAL_AGENCIES")

rev_fed_agencies <- rev_fed_agencies %>% 
  select(ENTITY_ID = `Entity ID`,
         FISCAL_YEAR = `Fiscal Year`,
         FUNC_CO = `Total_Functional Revenues_Intergovernmental - County`,
         GENREV_CO = `Total _General Revenues_Intergovernmental - County`,
         TOT_REV_CO = `Total _Total Revenues_Intergovernmental - County`,
         FUNC_COMM_DEV_BLOCK_GRANT = `Community Development Block Grant_Functional Revenues`,
         FUNC_FED = `Total _Functional Revenues_Intergovernmental - Federal`,
         GENREV_FED = `Total _General Revenues_Intergovernmental - Federal`,
         TOT_REV_FED = `Total _Total Revenues_Intergovernmental - Federal`)


# CI_REV_STATE_AGENCIES: Intergovernmental - State

rev_st_agencies <- read_excel("CI_EachDataSet_20180613.xlsx", sheet = "CI_REV_STATE_AGENCIES")

rev_st_agencies <- rev_st_agencies %>%
  select(ENTITY_ID = `Entity ID`, 
         FISCAL_YEAR = `Fiscal Year`,
         FUNC_ST = `Total_Functional Revenues`,
         GENREV_ST = `Total_General Revenues`,
         TOT_REV_ST = `Total_Total Revenues`)


# CI_REV_CHARGES_CURR_SERV: Current Service Charges

rev_charges_serv <- read_excel("CI_EachDataSet_20180613.xlsx", sheet = "CI_REV_CHARGES_CURR_SERV")

rev_charges_serv <- rev_charges_serv %>%
  select(ENTITY_ID = `Entity ID`,
         FISCAL_YEAR = `Fiscal Year`,
         FUNC_SP_FIRE_DEPT_SERV = `Special Fire Department Services_Functional Revenues`, 
         FUNC_CHARGES = `Total_Functional Revenues`)

# put tables together
main <- cons_ass_liab %>% 
  left_join(cons_rev_exp_fund, by = c("ENTITY_ID", "FISCAL_YEAR")) %>%
  left_join(rev_taxes, by = c("ENTITY_ID", "FISCAL_YEAR")) %>%
  left_join(exp_gen_gov, by = c("ENTITY_ID", "FISCAL_YEAR")) %>%
  left_join(rev_charges_serv, by = c("ENTITY_ID", "FISCAL_YEAR")) %>%
  left_join(exp_pub_util, by = c("ENTITY_ID", "FISCAL_YEAR")) %>%
  left_join(rev_fed_agencies, by = c("ENTITY_ID", "FISCAL_YEAR")) %>%
  left_join(rev_st_agencies, by = c("ENTITY_ID", "FISCAL_YEAR")) 

main[, c(3:89)] <- sapply(main[, c(3:89)], as.numeric)

main <- main %>% left_join(entities, by = "ENTITY_ID")
main <- main[, c(1:2, 90:95, 3:89)]

# store dataframe for 2003-2016
main_03_16 <- main
rm(main, entities, cons_ass_liab, cons_rev_exp_fund, exp_gen_gov, rev_charges_serv,
   rev_taxes, rev_fed_agencies, rev_st_agencies, exp_pub_util)



############## Append the two municipal finance dataframes together ##############

list_entities <- left_join(
  main_03_16 %>% group_by(ENTITY_ID) %>% 
    summarize(ZIP = unique(ZIP)[1], FY_END = unique(FY_END)[1]),
  main_91_02 %>% group_by(ENTITY_ID) %>% 
    summarize(BEGINNING_YEAR = unique(BEGINNING_YEAR)[1]))

main <- rbind(main_03_16 %>% select(-ZIP, -POPULATION, -FY_END), 
              main_91_02 %>% select(-FEDERAL_CENSUS_POPULATION, 
                             -FEDERAL_CENSUS_POPULATION_2000,
                             -BEGINNING_YEAR))

main <- main %>% left_join(list_entities, by = "ENTITY_ID")

main <- main[, c(1:5, 93:95, 6:92)]

# drop completely empty observations
main$na_count <- apply(main[, c(9:95)], 1, function(x) sum(is.na(x)))
table(main$na_count, useNA = "always")
main <- main %>% filter(na_count < 87) %>% select(-na_count)

table(main$FISCAL_YEAR, useNA = "always")

rm(main_91_02, main_03_16, list_entities)



############## Merge in population data ##############

# 1990-2000
pop1 <- read_excel("E-4_90-00_Rpt.xls", sheet = "Table 2  City Pop Est")

pop1 <- pop1[c(11:657), c(1, 3:12)]

names(pop1) <- c("City", "1991", "1992", "1993", "1994", "1995", 
                 "1996", "1997", "1998", "1999", "2000")

pop1 <- pop1 %>% filter(!is.na(`2000`), City != "Balance Of County")

pop1[, c(2:11)] <- sapply(pop1[, c(2:11)], as.numeric)

# 2001-2010
pop2 <- read_excel("E4_2000-2010_Report_Final_EOC_000.xls", sheet = "Table 2 City County")

pop2 <- pop2[c(11:766), c(1, 3:12)]

names(pop2) <- c("City", "2001", "2002", "2003", "2004", "2005", 
                 "2006", "2007", "2008", "2009", "2010")

pop2 <- pop2 %>% filter(!is.na(`2010`), 
                        City != "Balance Of County",
                        City != "County Total",
                        City != "Incorporated",
                        City != "Trinity County",
                        City != "Alpine County",
                        City != "Mariposa County")

pop2[, c(2:11)] <- sapply(pop2[, c(2:11)], as.numeric)

# 2011-2020
pop3 <- read_excel("E-4_2019InternetVersion.xls", sheet = "Table 2 City County")

pop3 <- pop3[c(11:774), c(1, 3:8)]

names(pop3) <- c("City", "2011", "2012", "2013", "2014", "2015", "2016")

pop3 <- pop3 %>% filter(!is.na(`2016`), 
                        City != "Balance Of County",
                        City != "County Total",
                        City != "Incorporated",
                        City != "Trinity County",
                        City != "Alpine County",
                        City != "Mariposa County")

pop3[, c(2:7)] <- sapply(pop3[, c(2:7)], as.numeric)

# merge together
pop <- full_join(pop1, pop2, by = "City")
pop <- full_join(pop, pop3, by = "City")

# reshape long
pop <- pop %>% gather(key = "FISCAL_YEAR", value = "POP", -"City") %>% 
  rename(ENTITY_NAME = City)

pop$FISCAL_YEAR <- pop$FISCAL_YEAR %>% as.numeric()

# merge with main dataframe
main$ENTITY_NAME <- main$ENTITY_NAME %>% as.character()
main$ENTITY_NAME[main$ENTITY_NAME == "Angels"] <- "Angels City"
main$ENTITY_NAME[main$ENTITY_NAME == "Mt. Shasta"] <- "Mount Shasta"
main$ENTITY_NAME[main$ENTITY_NAME == "St. Helena"] <- "St Helena"
main$ENTITY_NAME[main$ENTITY_NAME == "McFarland"] <- "Mcfarland"
main <- left_join(main, pop, by = c("ENTITY_NAME", "FISCAL_YEAR"))

rm(pop1, pop2, pop3, pop)


############## Merge with fire event and service category ##############

# fires in 2000 happen during the 2000-2001 fiscal year
main <- main %>% mutate(FISCAL_YEAR = FISCAL_YEAR - 1)

# create a crosswalk

list_entities <- main %>% group_by(ENTITY_ID) %>%
  summarize(ENTITY_NAME = unique(ENTITY_NAME)[1]) 

list_entities$ENTITY_NAME <- list_entities$ENTITY_NAME %>% 
  str_replace("Fort ", "Ft ") 

list_entities <- list_entities %>% arrange(ENTITY_NAME)

# merge place code

munilist <- read_csv("CalMuniList.csv")

munilist <- munilist %>% filter(Name != "CABAZON CITY") %>% arrange(Name) 

list_entities <- cbind(list_entities, munilist %>% select(FIPS.Code, FIPS.County.Code))

main <- left_join(main, list_entities %>% select(ENTITY_ID, FIPS.Code, FIPS.County.Code), by = "ENTITY_ID")

rm(munilist, list_entities)

# fire data in place-year observations

fires <- read_csv("place_panel.csv")

fires <- fires %>% mutate(FIPS.Code = paste("06-", place, sep = "")) %>%
  rename(FISCAL_YEAR = FIRE_YEAR) %>%
  select(-place)

muniyear <- left_join(main, fires, by = c("FIPS.Code", "FISCAL_YEAR"))

# service categories

service <- read_excel("PrT16p.xlsx", sheet = "GRV", range = "A18:AG500")

service <- service %>% 
  select(City, ...33) %>%
  rename(ENTITY_NAME = City, SERVICE = ...33)

service$ENTITY_NAME[service$ENTITY_NAME == "Angels Camp"] <- "Angels City"
service$ENTITY_NAME[service$ENTITY_NAME == "Carmel"] <- "Carmel-By-The-Sea"
service$ENTITY_NAME[service$ENTITY_NAME == "Saint Helena"] <- "St Helena"

muniyear <- left_join(muniyear, service, by = "ENTITY_NAME")

rm(main, fires, service)



############## Clean and merge in income tax data ##############

# read raw data
tax_zip <- read_csv("Personal_Income_Tax_Statistics_By_Zip_Code.csv")

# zip code level data
names(tax_zip) <- c("TaxYear", "Zipcode", "State", "City", "County",
                    "Returns", "AGI", "TaxLiab", "CntyLat", "CntyLon",
                    "GeoCnty", "GeoCity", "GeoZip")
tax_zip <- tax_zip %>% arrange(Zipcode, TaxYear)

# collapse to city level
tax_zip$City[tax_zip$City == "El Dorado hills"] <- "El Dorado Hills"

tax_city <- tax_zip %>% group_by(City, TaxYear) %>%
  summarize(Returns = sum(Returns/1000, na.rm = T),
            AGI = sum(AGI/1000, na.rm = T),
            TaxLiab = sum(TaxLiab/1000, na.rm = T),
            GeoCity = unique(GeoCity)[1],
            County = unique(County)[1])

# generate city id
tax_city <- tax_city %>% group_by(City) %>% 
  mutate(CityId = group_indices())

# manual correction of city names for merging
temp1 <- tax_city %>% group_by(City) %>%
  summarize(CityId = unique(CityId)[1])

temp1$City[temp1$City == "Amador City"] <- "Amador"
temp1$City[temp1$City == "Angels Camp"] <- "Angels City"
temp1$City[temp1$City %in% c("Belvedere Tiburon", "Bel Tiburon")] <- "Belvedere"
temp1$City[temp1$City == "Carmel By The Sea"] <- "Carmel-By-The-Sea"
temp1$City[temp1$City == "Del Rey"] <- "Del Rey Oaks"
temp1$City[temp1$City == "City Of Industry"] <- "Industry"
temp1$City[temp1$City == "Los Altos"] <- "Los Altos Hills"
temp1$City[temp1$City == "McFarland"] <- "Mcfarland"
temp1$City[temp1$City == "Palos Verdes Peninsula"] <- "Palos Verdes Estates"
temp1$City[temp1$City == "Saint Helena"] <- 	"St Helena"

# create a crosswalk
temp2 <- muniyear %>% group_by(ENTITY_ID) %>%
  summarize(ENTITY_NAME = unique(ENTITY_NAME)[1],
            COUNTY_NAME = unique(COUNTY_NAME)[1],
            ZIP = unique(ZIP)[1])

temp <- temp2 %>% 
  left_join(temp1, by = c("ENTITY_NAME" = "City"))            

# merge back the crosswalk
tax_city <- tax_city %>% left_join(temp, by = "CityId")

# filter out observations without a match
tax_city <- tax_city %>% filter(!is.na(ENTITY_ID))

tax_city <- tax_city %>% mutate(FISCAL_YEAR = TaxYear + 1) %>%
  select(ENTITY_ID, FISCAL_YEAR, Returns, AGI, TaxLiab)

muniyear <- muniyear %>% left_join(tax_city, by = c("ENTITY_ID", "FISCAL_YEAR"))

rm(tax_zip, tax_city, temp, temp1, temp2)


############## Merge in HMDA data ##############

hmda <- read_csv("hmda_panel.csv")

hmda <- hmda %>% mutate(FISCAL_YEAR = year + 1,
                        FIPS.Code = paste("06-", placefp, sep = "")) 

muniyear <- muniyear %>% left_join(hmda, by = c("FIPS.Code", "FISCAL_YEAR"))

rm(hmda, incometax)

write_rds(muniyear, "muni_panel.rds")

############## Create stacked dataset ##############

# treated dataset
treated <- muniyear %>% 
  filter(percent_fire > 0.10, 
         FISCAL_YEAR >= 1995) %>% 
  arrange(desc(percent_fire)) %>%
  filter(ENTITY_NAME != "Fillmore" | as.character(FISCAL_YEAR) == "2003",
         ENTITY_NAME != "Malibu" | as.character(FISCAL_YEAR) == "2007",
         ENTITY_NAME != "Moorpark" | as.character(FISCAL_YEAR) == "2003") %>% 
  select(ENTITY_ID, FISCAL_YEAR)

# create the stacked sample
main <- data.frame()

for (i in 1:nrow(treated)) {
  treat <- treated[i,]
  if (treat$FISCAL_YEAR < 2011) {
    control <- treated %>% filter(FISCAL_YEAR >= treat$FISCAL_YEAR + 5)
    temp <- rbind(treat, control) %>% 
      mutate(event = i,
             eventyear = treat$FISCAL_YEAR,
             fire = 1*(FISCAL_YEAR == eventyear)) %>%
      select(-FISCAL_YEAR)
    temp <- muniyear %>% left_join(temp, by = "ENTITY_ID")
    temp <- temp %>% filter(FISCAL_YEAR >= eventyear-5,
                            FISCAL_YEAR <= eventyear+4)
    main <- rbind(main, temp)
  }
}

############## Saving final dataset ##############
write_rds(main, "final_data.rds")


